rm(list=ls())
set.seed(12435)

###
### First validation exercise, based on full speeches
###

# load and inspect data
codesheet <- read.csv("validation1.csv")
head(codesheet)

# set up plot
codesheet$lwe_problem <- rowMeans(codesheet[,c("lae_lwe_problem", "tho_lwe_problem")])
codesheet$lwe_more <- rowMeans(codesheet[,c("lae_lwe_more", "tho_lwe_more")])
codesheet$rwe_problem <- rowMeans(codesheet[,c("lae_rwe_problem", "tho_rwe_problem")])
codesheet$rwe_more <- rowMeans(codesheet[,c("lae_rwe_more", "tho_rwe_more")])
codesheet$lwe_rwe_equal <- rowMeans(codesheet[,c("lae_lwe_rwe_equal", "tho_lwe_rwe_equal")])
codesheet$left <- as.factor(ifelse(codesheet$log_ratio < 0, "Left", "Right"))

###
### Figure B.4: Validity Check of Keyword Analysis in Speeches: Graphical Display
###
pdf("FigB4.pdf", height=10,width=8)
layout(matrix(c(1,1,1,1,1,0,2,2,2,2,2,
                3,3,3,3,3,0,4,4,4,4,4,
                0,0,0,5,5,5,5,5,0,0,0), 3, 11, byrow = TRUE))
plot(codesheet$log_ratio_2025, jitter(codesheet$lwe_more,0.5), xlab="Logged keyword ratio", ylab="LWE More", pch=as.numeric(codesheet$left)+15, yaxt='n')
axis(side = 2, at = c(0, 0.5, 1))
plot(codesheet$log_ratio_2025, jitter(codesheet$rwe_more,0.5), xlab="Logged keyword ratio", ylab="RWE More", pch=as.numeric(codesheet$left)+15, yaxt='n')
axis(side = 2, at = c(0, 0.5, 1))
plot(codesheet$log_ratio_2025, jitter(codesheet$lwe_problem,0.5), xlab="Logged keyword ratio", ylab="LWE Problem", pch=as.numeric(codesheet$left)+15, yaxt='n')
axis(side = 2, at = c(0, 0.5, 1))
plot(codesheet$log_ratio_2025, jitter(codesheet$rwe_problem,0.5), xlab="Logged keyword ratio", ylab="RWE Problem", pch=as.numeric(codesheet$left)+15, yaxt='n')
axis(side = 2, at = c(0, 0.5, 1))
plot(codesheet$log_ratio_2025, jitter(codesheet$lwe_rwe_equal,0.5), xlab="Logged keyword ratio", ylab="LWE RWE Equal", pch=as.numeric(codesheet$left)+15, yaxt='n')
axis(side = 2, at = c(0, 0.5, 1))
dev.off()


###
### Table B.16: Validity Check of Keyword Analysis in Speeches
###
right <- subset(codesheet, count_right_2025>12)
table(right$lae_rwe_problem, right$tho_rwe_problem)
table(right$lae_rwe_more, right$tho_rwe_more)
table(right$lae_lwe_rwe_equal, right$tho_lwe_rwe_equal)

left <- subset(codesheet, count_left_2025>12)
table(left$lae_lwe_problem, left$tho_lwe_problem)
table(left$lae_lwe_more, left$tho_lwe_more)
table(left$lae_lwe_rwe_equal, left$tho_lwe_rwe_equal)

mixed <- subset(codesheet, count_left_2025>3)
mixed <- subset(mixed, count_right_2025>3)
table(mixed$lae_rwe_problem, mixed$tho_rwe_problem)
table(mixed$lae_rwe_more, mixed$tho_rwe_more)
table(mixed$lae_lwe_problem, mixed$tho_lwe_problem)
table(mixed$lae_lwe_more, mixed$tho_lwe_more)
table(mixed$lae_lwe_rwe_equal, mixed$tho_lwe_rwe_equal)



###
### Second validation exercise, based on speech segments surrounding keywords
###

rm(list=ls())

# load and inspect data
coded <- read.csv("validation2.csv")
head(coded)

# set up data for analysis
coded$abbreviation = factor(coded$abbreviation, levels=c("DIE LINKE.", "Grüne", "SPD", "FDP", "CDU/CSU", "AfD"))
coded_rw <- subset(coded, type_kw=="right")
coded_lw <- subset(coded, type_kw=="left")
coded_lw$one = 1
coded_rw$one = 1
lw_N = aggregate(one~abbreviation, FUN=sum, data=coded_lw)
rw_N = aggregate(one~abbreviation, FUN=sum, data=coded_rw)
speeches_rw_tab = aggregate(cbind(rw_problem, rw_downplay, rw_othersexagg)~abbreviation, FUN=mean, data=coded_rw)
speeches_lw_tab = aggregate(cbind(lw_problem, lw_downplay, lw_othersexagg)~abbreviation, FUN=mean, data=coded_lw)
rw_tab <- cbind.data.frame(speeches_rw_tab$abbreviation, rw_N$one, speeches_rw_tab$rw_problem, speeches_rw_tab$rw_downplay, speeches_rw_tab$rw_othersexagg)
lw_tab <- cbind.data.frame(speeches_lw_tab$abbreviation, lw_N$one, speeches_lw_tab$lw_problem, speeches_lw_tab$lw_downplay, speeches_lw_tab$lw_othersexagg)
rw_tab[,c(3:5)] <- round(rw_tab[,c(3:5)], 2)
lw_tab[,c(3:5)] <- round(lw_tab[,c(3:5)], 2)

###
### Table B.17: Coding of Keyword Use in Speeches
###
rw_tab
lw_tab



###
### Third validation exercise, based on manifesto segments surrounding keywords
###

rm(list=ls())

# load and inspect data
coded <- read.csv("validation3.csv")
head(coded)

# set up data for analysis
coded$partyname = factor(coded$partyname, levels=c("Left", "Greens", "SPD", "FDP", "CDU/CSU", "AFD"))
coded_rw <- subset(coded, type_kw=="right")
coded_lw <- subset(coded, type_kw=="left")
coded_lw$one = 1
coded_rw$one = 1
lw_N = aggregate(one~partyname, FUN=sum, data=coded_lw)
rw_N = aggregate(one~partyname, FUN=sum, data=coded_rw)
speeches_rw_tab = aggregate(cbind(rw_problem, rw_downplay, rw_othersexagg)~partyname, FUN=mean, data=coded_rw)
speeches_lw_tab = aggregate(cbind(lw_problem, lw_downplay, lw_othersexagg)~partyname, FUN=mean, data=coded_lw)
rw_tab <- cbind.data.frame(speeches_rw_tab$partyname, rw_N$one, speeches_rw_tab$rw_problem, speeches_rw_tab$rw_downplay, speeches_rw_tab$rw_othersexagg)
lw_tab <- cbind.data.frame(speeches_lw_tab$partyname, lw_N$one, speeches_lw_tab$lw_problem, speeches_lw_tab$lw_downplay, speeches_lw_tab$lw_othersexagg)
rw_tab[,c(3:5)] <- round(rw_tab[,c(3:5)], 2)
lw_tab[,c(3:5)] <- round(lw_tab[,c(3:5)], 2)

###
### Table B.18: Coding of Keyword Use in Manifestos
###
rw_tab
lw_tab